//	COPYRIGHT (C) 1981 BY BOARD OF TRUSTEES,
//	LELAND STANFORD JUNIOR UNIVERSITY

$LIST

let MSDF() be $(msdf
 /* 
	This function has to create, or augment, a simple data-base
  containing ion-compostions and definitions of substructures that
  purport to be representations of constitutions of those ions.

	Currently, there is very little error checking.

  MODS JAN 80. Allow "MODE" to be "ION" or "LOSS", so that one may
 define substructures leading to characteristic losses.


  MODS Mid-Jan 80, Sattelite peaks used to confirm substructures.


  Mods late-Jan 80,
	Different file formats, problem was several ion compositions
 all implying same substructure resulting in many duplicate entries
 of structure tables + inconvenience of specifying ion+satellites
 for each of the different ions in the group.

*/

manifest $( MAXNUM = 40 $);

/* MAXNUM is maximum number of ions/losses in a 'group', its seems easier
 to allocate arrays this way than use lists etc.
*/

static $(
ATNAMES = NIL // Vector containing (pointers to) names of atoms.
COMPS = NIL  // Set of compositions representing all variants.
FILEUNDER = NIL // Vector of true/false indicators, used to determine
		// if some substructure to be resistered with any
		// particular one of compositions given for it.
INDB = NIL   // File pointer when reading data-base
IONLOWS = NIL // Low limit on ion/loss intensity.
IONHIHS = NIL // High limit on ion/loss intensity.
MASSES = NIL   // Nominal mass of ion/loss being defined by user
MAZ = NIL    // Nominal masses (integer) for each of different atom
	     // types, (normally use mass of lowest mass isomer if
             // dealing with things like chlorine etc).
MF = NIL     // (used when running along ALLDEFS list in various different functions
MODE = NIL   // ION?LOSS
NEXTMASS = NIL // Next mass in data file
NTYPES = NIL // The number of different atom types defined in ALLDEFS
NUM = NIL // Vector giving composition of ion/loss read from reference file
NUMPEAKS = NIL // Number of ions/losses in next group.
OIN = NIL 	// OIN and OOUT used to hold pointers to I/O streams
OOUT = NIL 	// when reading/writing to some temporary file.
PEAK = NIL      // Current PEAK being considered (from set of NUMPEAKS)
SC2  = NIL	// File pointer for temporary file
SUBNAME = NIL   // Substructure for which evidence is being given.
QQSTR = "MSI-HELP" // Response to ?? entered by user, (eventually this
        	// is intended to be a key into a real "HELP" file.
$)

let GETCMP(MSG) = valof $(gtcmp
  /* GETCMP unpacks the composition of one ion, or of one possible
     neutral loss, read by calling routine using LINEIN. Returns true
     if data is valid, otherwise false. 
     "MSG" is TRUE if should print warning messages etc
   */

  static $( ATNAME = NIL; DTYPE = NIL $);
  for a=1 to NTYPES do NUM!a:=0;
atom:   
  unless NEXTIS(STRTYPE) do $(bad
      unless MSG do resultis FALSE
      OUTS("*C*LAtom name missing.*C*L");         
      FLUSHLINE();
      resultis FALSE
     $)bad
  ATNAME:=LOPITEM();
  DTYPE:=DEFTYPEOF(ATNAME);
  if (DTYPE = 0) | (NOT (STREQUAL("ATOM",STROFNUM(DTYPE)))) then $(bad
         unless MSG do resultis FALSE
         OUTSNUM(ATNAME);
         OUTS(" is not an atom.*C*L");
         FLUSHLINE();
         resultis FALSE
        $)bad
  for A=1 to NTYPES do
      if ATNAME = ATNAMES!A then $(found
               NUM!A+:=(NEXTIS(NUMTYPE)->LOPITEM(),1);
               goto done
               $)found

  unless MSG do resultis FALSE
  OUTS(ATNAME);
  OUTS(" is not part of molecular composition!*C*L");
  FLUSHLINE()
  resultis FALSE;


done: 
     if NEXTIS(PSEOLTYPE) | NEXTIS(EOLTYPE) then resultis TRUE;
     goto atom
 $)gtcmp
let COPYSUB(SNAME) be $(CPYSB
 OPENIN(TOPFILENAME)
 FINDSEG(CHUNKSEP,ESHEADSTR)
 FINDSEG(ESSEP,STROFNUM(SNAME))
 OUTSNUM(SNAME)
 NEWLINE(1)
 COPYSEG(ESSEP,FALSE)
 CLOSEIN()
 $)CPYSB



let COPYTOFILEPOS() BE $(FNNS
static $( OKCOMP = NIL $)


LP:
 /* READ NEXT MASS FROM THE REFERENCE FILE */
 COPYSEGSTO(ESSEP,MODE,TRUE)
 LITEMS!LPOSN:=EOLTYPE
 NEXTMASS:=GETNONNEGINT("",QQSTR,FALSE)

 /* File will be terminated by a ZERO mass. */

 if NEXTMASS LE 0 then goto DONE


 /* See if this mass corresponds to that of the ion/loss being
 considered.
 */
	
  if MASSES!PEAK=NEXTMASS then goto OKMASS;
	
 /* Check if NEXTMASS greater than MASSES!PEAK, if so we are creating
 a new entry in data-base that should go in before NEXTMASS.
 */

  if NEXTMASS GR MASSES!PEAK then goto DONE;


 /* This mass is less than that which we are
 looking for so copy the rest of this segment of the
 data file
 */
 OUTNOL(NEXTMASS)
 COPYSEG(CHUNKSEP,TRUE)
 goto LP

OKMASS:
 OUTNOL(NEXTMASS)
 /* The mass seems OK, read in the composition. */
 LINEIN("")
 unless GETCMP(FALSE) do $(
	/* Something wrong with this ion, its using atom types irrelevant
	to current problem or some similar fault. Copy it and go to next.
         */
        for N=1 to NTYPES
           unless NUM!N=0 do $( OUTSNUM(ATNAMES!N); SPACES(1); OUTNOS(NUM!N) $)
        NEWLINE(1)
	COPYSEG(CHUNKSEP,TRUE)
        goto LP
	$)

 /* Copy composition to output file. */

 for N=1 to NTYPES
    unless NUM!N=0 do $( OUTSNUM(ATNAMES!N); SPACES(1); OUTNOS(NUM!N) $)
 NEWLINE(1)


 /* Find if the composition matches  that of the ion/loss */

 OKCOMP:=TRUE
 for A=1 to NTYPES do
     OKCOMP:=OKCOMP & (NUM!A=[COMPS!PEAK]!A);
 
/* If this ion/loss composition doesn't match that required
copy the rest of the data in this segment of the file.
*/

 unless OKCOMP do $(
  COPYSEG(CHUNKSEP,TRUE)
  goto LP
  $)

/* To DONE, have found place where new data should be entered.
 If NEXTMASS = MASSES!PEAK then, implicitly compositions are the same
 and we will be adding further alternatives to those already existing
 for some composition.
*/

DONE:
 
$)FNNS

let GETMASSES() be $(gtms
 static $( NT = NIL $);
   let ATMASS(A) = valof $(mass
      static $( NMASS = NIL $)
      for n=1 to 4 do
        if STREQUAL(STROFNUM(A),[TABLE 0,"C","N","O","H"]!n) then
           resultis [TABLE 0,12,14,16,1]!n;
      OUTS("Atom type ");
      OUTSNUM(A);
      OUTS(" is new.")

      NMASS:=GETPOSINT("*C*LIntegral mass : ",QQSTR,FALSE) 

      resultis NMASS
      $)mass

    MF:=ALLDEFS;
    NT:=0;
    while MF NE @NULL do $(lp
       static $( ATN = NIL; ATT = NIL $)
       ATN:=CAR(CAR(MF));
       ATT:=DEFTYPEOF(ATN);
       if (ATT NE 0) & STREQUAL("ATOM",STROFNUM(ATT)) do
		$(atom
	          NT+:=1;
       		  ATNAMES!NT:=ATN;
	          MAZ!NT:=ATMASS(ATN);
		$)atom
       MF:=CDR(MF)
       $)lp ;
    $)gtms


let GETSUBSTRUCTURE() = valof $(GTSB
 STATIC $( DNAME = NIL; DTYPE = NIL; OK = NIL; OLD = NIL $);
/* Have to get user to identify substructure for which he wants to
define an ion pattern (loss pattern). Then look in the database
for a substructure of this name, if its there acknowledge and continue.

If the substructure is not already in the database, see if we have one
defined with that name, if so get it into the database.

Return false if no substructure name given.
*/
LP:
unless CONDPROMPT("Substructure :",0,
   [TABLE 2,"The name of a substructure, that your have previously defined,*C*L",
   	"which represents a possible constitution for this set of ions/losses.*C*L"],
	QQSTR,STV)
  do resultis FALSE
SUBNAME:=LOPITEM()


/* See if its in the Data-base already. */
OLD:=FALSE; OK:=FALSE;
OIN:=INPUT;
SWAPLITEMS();
INPUT:=FINDFILE("DSK",PROMPTFILENAME,PROMPTFILEEXT);
FINDSEG(CHUNKSEP,ESHEADSTR);
$(RPT
	SKIPSEG(ESSEP);
	LINEIN("");
	/* The set of substructures will terminate with a blank item. */
	if NEXTIS(EOLTYPE) then break;
	DNAME:=LOPITEM();
	if SUBNAME=DNAME then $(found
		OLD:=TRUE
		/* Have found something in that library with that name,
		best check its a substructure.
		*/
		LINEIN("");
		test STREQUAL(STROFNUM(LOPITEM()),"SUBSTRUCTURE") then $(ok
			/* Something of that name is in the library, he can't change
			it, just state its being found.
			*/
			OUTSNUM(DNAME); OUTS(", existing entry in Data Base.*C*L")
			OK:=TRUE
			$)ok
		or  $(bad
			
			OUTSNUM(DNAME); OUTS(", exists in the Data Base, but not as a substructure.*C*L")
			OUTS("So, sorry but you can't reuse that name.*C*L")
			
	            $)bad
		break
		$)found
$)RPT REPEAT;
LITEMS!LPOSN:=EOLTYPE
SWAPLITEMS();
ENDREAD(INPUT);
INPUT:=OIN;
if OLD then resultis OK

/* This substructure doesn't appear to be in the data base, maybe its
something new, see if currently defined as a substructure.
*/

unless DEFTYPEOF(SUBNAME)=NUMOFSTR("SUBSTRUCTURE") do
	$( OUTSNUM(SUBNAME); OUTS(" is not defined as a substructure.*C*L");
	   FLUSHLINE();
	   goto LP
	$)

/* Ok, its new, acknowledge that, then get it copied into the
data-base.
*/
OUTSNUM(SUBNAME); OUTS(", new entry for MS Data Base.*C*L");

OIN:=INPUT;
INPUT:=FINDFILE("DSK",PROMPTFILENAME,PROMPTFILEEXT);
OOUT:=OUTPUT;
OUTPUT:=CREATEFILE("DSK",SC2FILENAME(),CGEXT);
SWAPLITEMS()
COPYSEGSTO(CHUNKSEP,ESHEADSTR,TRUE);
COPYSEGSTO(ESSEP,"",FALSE);
COPYSUB(SUBNAME);
OUTCH(ESSEP);
NEWLINE(1)
COPYTOEND();
LITEMS!LPOSN:=EOLTYPE
SWAPLITEMS()
ENDREAD(INPUT);
ENDWRITE(OUTPUT);
INPUT:=OIN;
OUTPUT:=OOUT;
FILEREPLACE(PROMPTFILENAME,PROMPTFILEEXT,SC2FILENAME(),CGEXT);

resultis TRUE

$)GTSB




let SETUP() = valof $(STP
/* 
 Setting up for MS file system requires little more than finding,
 or creating, a file for the data-base.
*/

static  $(  CHAR = NIL; OIN = NIL; TFILE = NIL $)

  unless PROMPTFORFILE("Which is the file containing your Mass Spec Database : ",
	QQSTR)
	do resultis FALSE

  unless FILEEXISTS(PROMPTFILENAME,PROMPTFILEEXT) do $(
    FLUSHLINE()
    LINEIN("Header data for file : ") repeatwhile NEXTIS(EOLTYPE);
    OIN:=INPUT; OOUT:=OUTPUT;
    TFILE:=CREATEFILE("DSK",PROMPTFILENAME,PROMPTFILEEXT)
    INPUT:=FINDFILE("DSK","INIT","GLI",MAKPPN)
    OUTPUT:=TFILE;
    COPYSEGSTO(CHUNKSEP,ESHEADSTR,FALSE);
    OUTS("MSDATABASE-TITLE*C*L");
    LINEOUT()
    LITEMS!LPOSN:=EOLTYPE
    OUTCH(CHUNKSEP); OUTCH(ESSEP); OUTS("LOSS");
    OUTS("*C*L0*C*L");
    OUTCH(CHUNKSEP); OUTCH(ESSEP); OUTS("ION");
    OUTS("*C*L0*C*L");
    OUTCH(CHUNKSEP);
    OUTS(ESHEADSTR); NEWLINE(1)
    COPYTOEND();
    ENDREAD(INPUT);
    ENDWRITE(OUTPUT);
    INPUT:=OIN; OUTPUT:=OOUT	    
    $)

 GETMASSES()

resultis TRUE
$)STP
let INTENSRANGE() be $(irng
 static $( IONLOW = NIL; IONHIH = NIL $);
ilp:
   IONLOW:=
       GETPOSINT("Min Intens. : ","Low limit of allowed intensity range  : ", FALSE)
   unless (IONLOW>0) & (IONLOW<100) do $(lowlim
   	   OUTS("I want an integer in range 1-99 here for relative %intensity of the ion.*C*L")
   	   goto ilp
   	   $)lowlim

   IONHIH:=
       GETPOSINT("Max Intens. : ","High limit of allowed intenisty range : ", FALSE)
   unless (IONHIH>IONLOW) & (IONHIH LE 100) do $(hihlim
   	    OUTS("I want a value, greater than the low limit, and less than or*C*L")
            OUTS("equal to 100 for relative %intensity of the ion.*C*L")
   	    goto ilp
   	    $)hihlim

   IONLOWS!PEAK:=IONLOW;
   IONHIHS!PEAK:=IONHIH
$)irng


let SAVEIONS() be $(svn
static $( NAME = NIL; TMP = NIL $);

/* OBVIOUSLY, A LITTLE REPROGRAMMING APPROPRIATE HERE, CURRENTLY
 WE MAKE ONE PASS THROUGH THE FILE FOR EACH PEAK IN THE PEAKGROUP.
 IF PEAKS WERE SORTED BY MASS/COMPOSITION, THEN COULD PROBABLY ENTER
 THEM ALL IN ONE PASS.
*/

PEAK:=0;
UNTIL PEAK=NUMPEAKS DO $(NEXTPEAK
PEAK+:=1;
unless FILEUNDER!PEAK do loop;
OIN:=INPUT; OOUT:=OUTPUT
INDB:=FINDFILE("DSK",PROMPTFILENAME,PROMPTFILEEXT)
SC2:=CREATEFILE("DSK",SC2FILENAME(),CGEXT)

SWAPLITEMS()
INPUT:=INDB
OUTPUT:=SC2


/* Have to find where to store current composition (LOSS or ION), using
COPYTOFILEPOS(). 
Then if this is a new entry, write out mass composition etc.

*/

COPYTOFILEPOS()


unless NEXTMASS=MASSES!PEAK do $(mscmp
	/* If this is a new ion/loss, have to write out its mass and composition. */
	OUTNOL(MASSES!PEAK);
	for N=1 to NTYPES do
		unless [COMPS!PEAK]!N=0 do $(
				OUTSNUM(ATNAMES!N); SPACES(1); 
				OUTNOS([COMPS!PEAK]!N) $)
        NEWLINE(1)
	$)mscmp
	
/* NOW, Write out new entry as substructure name, intensity range, then number of
satellite ions + compositions of satellites etc.
*/


OUTSNUM(SUBNAME); NEWLINE(1);
/* Intensity range. */
OUTNOS(IONLOWS!PEAK); OUTNOL(IONHIHS!PEAK)
/* Other ions of group. */
OUTNOL(NUMPEAKS-1);
for N=1 to NUMPEAKS do 
  unless N=PEAK do $(sats
	OUTNOS(MASSES!N); 
	for A=1 to NTYPES do 
	   unless [COMPS!N]!A=0 do $( OUTSNUM(ATNAMES!A); OUTNO([COMPS!N]!A) $);
        SPACES(1); OUTNOS(IONLOWS!N); OUTNOL(IONHIHS!N)
	$)sats
	
unless NEXTMASS=MASSES!PEAK do $(
      NEWLINE(1);
      OUTCH(CHUNKSEP)
      OUTCH(ESSEP); OUTS(MODE); NEWLINE(1);
      OUTNOL(NEXTMASS)
    $)

COPYTOEND()
ENDREAD(INPUT);
ENDWRITE(OUTPUT);
SWAPLITEMS()
INPUT:=OIN; 
OUTPUT:=OOUT


/* Replace file with updated version. */

INTERRUPTABLE(FALSE)
FILEREPLACE( PROMPTFILENAME,PROMPTFILEEXT, SC2FILENAME(),CGEXT)
INTERRUPTABLE(TRUE)
$)NEXTPEAK

$)svn


let GETIONS() = valof $(gtns
static $( ONEINDEX = NIL $)
switchon PROMPTSELECT("MODE (ION/LOSS) : ","ION => ION COMPOSITIONS, LOSS => LOSS COMPOSITIONS",0,
	QQSTR,[TABLE 2,"ION",1, "LOSS", 2, 0],FALSE)
  into $(
  case 1: MODE:="ION"; endcase;
  case 2: MODE:="LOSS"; endcase;
  default: resultis FALSE
  $)
NUMPEAKS:=GETPOSINT("Number of Peaks : ",
	"Number of compositions for observed ions/losses.*C*L",FALSE);
if NUMPEAKS LE 0 then resultis FALSE
if NUMPEAKS GR MAXNUM do $(
	OUTS("Sorry, can't handle that many.*C*L");
	resultis FALSE
	$)
PEAK:=0; ONEINDEX:=FALSE;
until PEAK=NUMPEAKS do $(NEXTPK
  PEAK+:=1
  unless CONDPROMPT("Composition : ", 0, 
	[TABLE 2,"A list of atom names and numbers seperated by spaces or",
	"commas, 1s may be omitted."],  "Composition of next peak : ",STV)
  do resultis FALSE
  unless GETCMP(TRUE) do resultis FALSE
  MASSES!PEAK:=0;
  for N=1 to NTYPES do $( [COMPS!PEAK]!N:=NUM!N; MASSES!PEAK+:=(NUM!N)*(MAZ!N) $)
  INTENSRANGE()
  FILEUNDER!PEAK:=
	YESNO("Register ? : ", 
		"Should the substructure be registered under this composition", "NO");
  ONEINDEX:=ONEINDEX | (FILEUNDER!PEAK)
$)NEXTPK
  unless ONEINDEX do $( OUTS("??? Not to be registered?*C*L"); resultis FALSE $)
  resultis TRUE
$)gtns


  NTYPES:=0;
  MF:=ALLDEFS;
  until MF=@NULL do $( 
        if STREQUAL("ATOM",STROFNUM(DEFTYPEOF(CAR(CAR(MF))))) then NTYPES+:=1
	MF:=CDR(MF) $);
  ATNAMES:=NEWVEC(NTYPES);
  MAZ:=NEWVEC(NTYPES);
  NUM:=NEWVEC(NTYPES);
  COMPS:=NEWVEC(MAXNUM);
  MASSES:=NEWVEC(MAXNUM);
  IONHIHS:=NEWVEC(MAXNUM);
  IONLOWS:=NEWVEC(MAXNUM);
  FILEUNDER:=NEWVEC(MAXNUM);
  for N=1 to MAXNUM do COMPS!N:=NEWVEC(NTYPES);

  unless SETUP() do goto CLEAN1

  $(rpt
  unless GETSUBSTRUCTURE() goto CLEAN1;

  if GETIONS() then SAVEIONS(); 
  $)rpt REPEATWHILE YESNO("Another Ion ? : ",QQSTR,"NO")

CLEAN1:

  for N=1 to MAXNUM do FREEVEC(COMPS!N);

  FREEVEC(FILEUNDER);
  FREEVEC(IONLOWS);
  FREEVEC(IONHIHS);
  FREEVEC(MASSES);
  FREEVEC(COMPS);

  FREEVEC(NUM);
  FREEVEC(MAZ);
  FREEVEC(ATNAMES)

$)msdf


